org 100h   ; assume ax=bx=0 si=0x100 di=sp=-2

  mov al,0x13
  int 0x10     ; set 320x200 mode

;Palette: 8 color gradients [21$]
  mov dx,0x3c8
  mov ch,2     ; cx=0x2ff
P mov al,0x34  ; ah=0
  add bx,ax
  ror al,cl
  mul bh
  shr ax,9
  out dx,al    ; init: index=0
  mov dl,0xc9
  loop P

  mov si,0x4000
  mov gs,si    ; bx=gs=0x4000: table segment, sin->cos phase

  push 0xa000 - 160/16 ; sp=-4

;Sine table: 16384 float32 entries (-1..1)
  fninit       ; [di]=0
S mov bp,[di]
  fild word[di]             ;| t
  fidiv word[c65536div2pi]  ;| T=t/65536*2pi
  fcos                      ;| cosT
  fstp dword[gs:bp+si]      ;| to [bp+0x4000]
  sub [di],sp  ; +4
  jnz S        ; bp=-4

  pop es       ; es=0x9ff6: centered screen segment

M:

;For each pixel: find dX,dY,dZ and initialize X,Y,Z
;bx=time di=pixel_address si=0x4000
X mov ax,0xcccd
  mul di
  mov cl,0x7c  ; cl:dh=dZ=0x7c??
  add dh,0x9f  ; dh:dl=dY
  xchg ax,bx   ; dl:bh=dX ax=time
  pusha        ; -10 -9 -8 -7 -6 -5 -4 -3
               ;  bl bh dl dh cl ch al ah
               ;     ( dX )      0  (time)
               ;        ( dY )
               ;           ( dZ )

  xchg ax,bx   ; bx=Z=time
  mov ax,si    ; ax=X=0x4000
  cwd          ; dx=Y=0

; Load dX,dY,dZ and rotate dX,dZ by time
                     ;  si=0x4000 si=0      si=0x4000 si=0
                     ;  bp=-4     bp=-4     bp=-2     bp=-2
L fild word[bp-5]    ;| dX      | dX      | dZ      | dZ
  fmul dword[gs:bx+si];|dX*cosT | dX*sinT | dZ*cosT | dZ*sinT
  xor si,ax
  jz L
  xor bp,2
  jpo L              ;| dZ*s dZ*c dX*s dX*c ; bx=0x4000 bp=-4
  fsubp st3,st0
  faddp              ;| dX=dZ*c+dX*s dZ=dX*c-dZ*s
  fild word[bp-4]    ;| dY dX dZ

  shl bx,3     ;Z=time*8

Z:
;Compute the distance to the gyroid
  fldl2e       ;| k=1.442695
G fld dword[gs:bx+si]
  xchg ax,dx   ;| cosZ | cosY | cosX
  xchg ax,bx   ; ax=X dx=Y si=Z -> ax=Z dx=X si=Y
  fmul dword[gs:bx]
  inc bp       ;| cosZ*sinY | cosY*sinX | cosX*sinZ
  jpo G        ; bp=-1
  faddp
  faddp        ;| d=cosZ*sinY+cosY*sinX+cosX*sinZ
  fst qword[si]; store d as float64
  fabs         ;| |d| k
  fsubp st1,st0;| D=k-|d|
  fmul dword[cDistFactor]

;Advance ray by distance
               ;| D dY dX dZ
A fxch st1     ;| dY D dX dZ
  fst st4      ;| dY D dX dZ dY
  fmul st1     ;| dY*D D dX dZ dY
  fistp dword[si]
  xchg ax,dx   ; ax=X dx=Y si=Z -> ax=Y dx=X si=Z
;  sar dword[si],2
  add ax,[si]  ; Y+=dY*D | X+=dX*D | Z+=dZ*D
  and al,0xfc  ; align to a multiple of 4 (for sine table)
  xchg ax,bx   ; ax=Y dx=X si=Z -> ax=Z dx=X si=Y
  dec bp
  jpo A        ; bp=-4

;Close enough?
  fstp dword[si]; store D as float32
  cmp byte[si+3],0x3d ; cmp bits(D),0x3d000000
  jl E         ; hit if D<0.125
  add cx,bp
  jnz Z        ; max 32 iterations

;Draw pixel
E fcompp
  fstp st0     ;|
  xchg ax,cx
;  or al,3
  shl byte[si+7],1 ; carry = sign of d
  adc al,al   ; color = sign of d
  stosb
  popa
  xchg ax,bx

;Next pixel
  inc di
  jnz X

;Next frame
  inc bh       ; time++
  in al,0x60   ; esc check
  dec al
  jnz M
  ret

c65536div2pi: dw 10430 ; 65536/2pi
cDistFactor: dd 0.22   ; (0.75 (Lipchitz constant) / 44700 (avg dir length)) * 65536/2pi
;cOffset: dd 1.5


;; Palette test
;  push 0xa000
;  pop es
;  xor di,di
;  xor ax,ax
;Y stosb
;  inc al
;  jnz Y
;  add di,64
;  jns Y
;
;  xor ax,ax
;  int 0x16
;  ret

